Data of 10 cultivars of rice inoculated with B. glumae or mock inoculated. Discoloration of spikelets were recorded and presented as percentage.
library(tidyverse)
library(readxl)
library(ggplot2)
rice_data <- read_excel("Spray-Data-06.18.20.xlsx",
col_types = c("text", "numeric", "numeric",
"numeric", "numeric","numeric"))
rice_data
## # A tibble: 320 x 6
## Genotype Rep `Mock_30C-22C` `Mock_30C-28C` `Pathogen_30C-2…
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 310111 1 5.88 18.6 86.5
## 2 310111 2 2.48 79.7 82.6
## 3 310111 3 2.9 61.7 100
## 4 310111 4 0 67.2 86.1
## 5 310111 5 1.76 75.4 98.6
## 6 310111 6 0 76.6 93.8
## 7 310111 7 NA NA 85.7
## 8 310111 8 NA NA 100
## 9 310111 9 NA NA 100
## 10 310111 10 NA NA 100
## # … with 310 more rows, and 1 more variable: `Pathogen_30C-28C` <dbl>
We still have to “reshape” the table to make it in longer format coding a column for treatment (Mock vs Inoculated) and temperature profile (30-22 vs 30-28).
rice_data_long <- rice_data %>%
pivot_longer(cols = c("Mock_30C-22C", "Mock_30C-28C",
"Pathogen_30C-22C", "Pathogen_30C-28C"),
names_to = "Inoculation",
values_to = "DiscPerc") %>%
separate(col = Inoculation,
sep = "_",
into = c("Inoculation", "TempProfile")) %>%
unite("Inoc_Temp", Inoculation:TempProfile, remove = FALSE)
#kableExtra::kable(rice_data_long, format = "markdown")
Separating mock from pathogen inoculated:
ggplot(data = rice_data_long, aes(x = Genotype, y = DiscPerc)) +
geom_boxplot(aes(fill = TempProfile)) +
facet_grid(. ~ Inoculation) +
coord_flip()
Looking at genotype effect:
ggplot(data = rice_data_long, aes(x = Inoc_Temp, y = DiscPerc)) +
geom_boxplot(aes(fill = TempProfile)) +
facet_wrap(Genotype ~ ., ncol = 5) +
theme(axis.text.x = element_text(angle = 60, hjust = 1))
Since we are dealing with continous data on four different conditions with need to scale them to estimate their relationships.
library(FactoMineR)
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(RColorBrewer)
#Need to remove NAs
rice_data_NoNAs <- na.omit(rice_data)
#Scaling data
rice_matrix <- rice_data_NoNAs[,3:6]
row.names(rice_matrix) <- paste0(rice_data_NoNAs$Genotype,"_",rice_data_NoNAs$Rep)
## Warning: Setting row names on a tibble is deprecated.
#K-means
set.seed(123)
(km.res <- kmeans(scale(rice_matrix), 3, nstart = 10))
## K-means clustering with 3 clusters of sizes 73, 61, 13
##
## Cluster means:
## Mock_30C-22C Mock_30C-28C Pathogen_30C-22C Pathogen_30C-28C
## 1 -0.2352621 -0.3567814 -0.7556317 -0.5798089
## 2 0.4009338 -0.1933930 0.7816750 0.4881734
## 3 -0.5602173 2.9109243 0.5753030 0.9651899
##
## Clustering vector:
## 310111_1 310111_2 310111_3 310111_4 310111_5 310111_6 310131_1 310131_2
## 2 3 3 3 3 3 1 1
## 310131_3 310131_4 310131_5 310131_6 310219_1 310219_2 310219_3 310219_4
## 1 1 1 1 2 1 2 1
## 310219_5 310219_6 310219_7 310219_8 310301_1 310301_2 310301_3 310301_4
## 2 2 1 1 2 2 2 2
## 310301_5 310301_6 310301_7 310354_1 310354_2 310354_3 310354_4 310354_5
## 2 2 2 1 1 1 1 1
## 310354_6 310354_7 310645_1 310645_2 310645_3 310645_4 310645_5 310645_6
## 2 2 2 1 2 1 1 1
## 310645_7 310645_8 310747_1 310747_2 310747_3 310747_4 310747_5 310747_6
## 1 1 2 2 2 2 1 2
## 310747_7 310747_8 310802_1 310802_2 310802_3 310802_4 310802_5 310802_6
## 1 2 2 1 1 1 1 1
## 310802_7 310802_8 310998_1 310998_2 310998_3 310998_4 310998_5 310998_6
## 1 1 2 2 2 2 1 1
## 310998_7 310998_8 311078_1 311078_2 311078_3 311078_4 311078_5 311078_6
## 2 2 2 2 3 3 3 2
## 311151_1 311151_2 311151_3 311151_4 311151_5 311151_6 311151_7 311151_8
## 2 1 1 1 1 1 1 1
## 311206_1 311206_2 311206_3 311206_4 311206_5 311206_6 311206_7 311206_8
## 1 1 1 1 1 1 1 1
## 311383_1 311383_2 311383_3 311383_4 311383_5 311383_6 311383_7 311383_8
## 2 2 2 2 2 2 2 2
## 311385_1 311385_2 311385_3 311385_4 311385_5 311385_6 311385_7 311385_8
## 1 1 1 1 1 1 1 1
## 311600_1 311600_2 311600_3 311600_4 311600_5 311600_6 311600_7 311600_8
## 2 1 1 1 1 1 2 2
## 311642_1 311642_2 311642_3 311642_4 311642_5 311642_6 311642_7 311642_8
## 1 1 2 2 2 2 2 1
## 311677_1 311677_2 311677_3 311677_4 311677_5 311677_6 311677_7 311735_1
## 2 2 1 1 3 3 3 2
## 311735_2 311735_3 311735_4 311795_1 311795_2 311795_3 311795_4 311795_5
## 2 2 2 2 2 2 2 2
## 311795_6 311795_7 311795_8 301161_1 301161_2 301161_3 301161_4 301161_5
## 2 1 3 3 1 1 1 1
## 301161_6 301161_7 301161_8
## 1 1 1
##
## Within cluster sum of squares by cluster:
## [1] 111.86268 173.12423 24.91307
## (between_SS / total_SS = 46.9 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
fviz_cluster(km.res, data = rice_matrix, labelsize = 8)
#Clustering
rice_hc <- hcut(rice_matrix, 3, stand = T)
#Graphical view
p <- fviz_dend(rice_hc, rect = T, cex=0.4)
This first one is using FactoMiner,
#Need to remove NAs
rice_data_NoNAs <- na.omit(rice_data)
#Creating Matrix for analysis
rice_matrix <- rice_data_NoNAs[,3:6]
row.names(rice_matrix) <- paste0(rice_data_NoNAs$Genotype,"_",rice_data_NoNAs$Rep)
## Warning: Setting row names on a tibble is deprecated.
#PCA
rice_PCA <- prcomp(rice_matrix, center = T, scale. = T)
#Since we have 20 cultivars we need a palette with 20 colors
colors_n <- length(unique(rice_data_NoNAs$Genotype))
getPalette <- colorRampPalette(brewer.pal(9, "Dark2"))
## Warning in brewer.pal(9, "Dark2"): n too large, allowed maximum for palette Dark2 is 8
## Returning the palette you asked for with that many colors
(PCA_rice <- fviz_pca_biplot(rice_PCA, col.var = "blue",
label = "var", repel = T,
habillage = rice_data_NoNAs$Genotype, addEllipses = TRUE, ellipse.level = 0.95,
ellipse.type ="confidence") +
scale_color_manual(values = getPalette(colors_n)) +
scale_shape_manual(values = c(rep(19, 5), rep(16,5), rep(17,5), rep(18,5))))
## Scale for 'shape' is already present. Adding another scale for 'shape', which
## will replace the existing scale.
plotly::ggplotly(PCA_rice)
## Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomTextRepel() has yet to be implemented in plotly.
## If you'd like to see this geom implemented,
## Please open an issue with your example code at
## https://github.com/ropensci/plotly/issues
rice_wider <- rice_data %>% pivot_wider(names_from = Rep, names_sep = "_",
values_from = c("Mock_30C-22C", "Mock_30C-28C",
"Pathogen_30C-22C", "Pathogen_30C-28C")) %>%
column_to_rownames("Genotype")
rice_wider_NoNAs <- rice_data %>%
pivot_wider(names_from = Rep, names_sep = "_",
values_from = c("Mock_30C-22C", "Mock_30C-28C",
"Pathogen_30C-22C", "Pathogen_30C-28C")) %>%
column_to_rownames("Genotype")